#### MAKE SIMULATION FIGURES ####

#### Packages and global options ####
library(tidyverse) # Data manipulation
library(here) # relative path management

# ggplot global options
theme_set(theme_bw(base_size = 20))

#### Load data ####
load(here("dle_sims_deflation.rda"))
load(here("dle_sims_inflation.rda"))
load(here("dle_sims_large.rda"))
load(here("cost_sim.rda"))

# Merge deflation and inflation
sims_main = rbind(
  sims %>% mutate(type = "Deflation"),
  sims_inf %>% mutate(type = "Inflation")
)

#### Figure 2 ####
# Statistical power under response deflation and inflation

# Recode rho labels
sims_main = sims_main %>% 
  mutate(rho = recode(as.factor(rho),
                      "0" = "rho == 0",
                      "0.4" = "rho == 0.4",
                      "0.8" = "rho == 0.8"))

# can use inquiry to distinguish tests from estimators
with(sims_main, table(inquiry, estimator, useNA = "ifany"))

# Calculate power
power_df = sims_main %>% 
  filter(!is.na(m)) %>% 
  group_by(type, gamma, rho,  m) %>% 
  summarize(power = mean(p.value <= 0.05, na.rm = TRUE))

# Visualize
power_df = sims_main %>% 
  filter(m == 10 | estimator == "diff_in_prevalence") %>% 
  group_by(type, estimator, gamma, rho) %>% 
  summarize(power = mean(p.value <= 0.05, na.rm = TRUE))

power_plot = ggplot(power_df) +
  aes(x = gamma, y = power, linetype = estimator, shape = estimator) +
  geom_vline(xintercept = 0.15, linetype = "dotted") +
  geom_line(size = 1) + geom_point(size = 2) +
  facet_grid(type ~ rho, label = "label_parsed") +
  scale_x_continuous(breaks = seq(0, 1, by = 0.2)) +
  scale_y_continuous(breaks = seq(0, 1, by = 0.2)) +
  scale_linetype_discrete(name = "Test",
                          labels = c("Difference in differences",
                                     expression(paste('Signed rank (',
                                                      m == 10, ')')))) +
  scale_shape_discrete(name = "Test",
                       labels = c("Difference in differences",
                                  expression(paste('Signed rank (',
                                                   m == 10, ')')))) +
  theme(legend.position = "bottom") +
  labs(x = expression(paste('Proportion of unintended responses (',gamma,')')),
       y = expression(paste('Power at ', alpha  == 0.05)))

power_plot

#### Figure D3 ####
# Bias induced by deflation and deflation across estimators

# Calculate bias
bias_df = sims_main %>% 
  filter(inquiry == "prevalence_rate") %>% 
  group_by(type, estimator, gamma, rho) %>% 
  summarize(bias = mean(estimate-estimand))

# Visualize
bias_rho = ggplot(bias_df) +
  aes(x = gamma, y = bias, linetype = estimator) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_smooth(method = "loess", se = FALSE, size = 1, color = "black") +
  labs(x = expression(paste('Proportion of unintended responses (',gamma,')')),
       y = "Bias\nMean(estimate - estimand)",
       linetype = "Estimator") +
  facet_grid(type ~ rho, label = "label_parsed", scales = "free_y") +
  theme(legend.position = "bottom")

bias_rho

#### Figure D4 ####
# Power of Stephenson’s signed rank test under additional subset sizes

# Calculate power
power_steph_df = sims_main %>% 
  filter(!is.na(m)) %>% 
  group_by(type, gamma, rho,  m) %>% 
  summarize(power = mean(p.value <= 0.05, na.rm = TRUE))


# Visualize
power_steph = ggplot(power_steph_df) +
  aes(x = gamma, y = power, linetype = as.factor(m), 
      shape =as.factor(m)) +
  geom_vline(xintercept = 0.15, linetype = "dotted") +
  geom_hline(yintercept = 0.8, linetype = "dotted") +
  geom_line(size = 1) + geom_point(size = 2) +
  facet_grid(type ~ rho, label = "label_parsed") +
  scale_y_continuous(breaks = seq(0, 1, by = 0.2)) +
  theme(legend.position = "bottom") +
  labs(x = expression(paste('Proportion of unintended responses (',gamma,')')),
       y = expression(paste('Power at ', alpha  == 0.05)),
       linetype = expression(paste('Subset size (', m, ')')),
       shape = expression(paste('Subset size (', m, ')')))

power_steph

#### Figure D5 ####
# Power at increasing magnitudes of response deflation

# Recode parameter labels
sims_lrg = sims_lrg %>% 
  mutate(rho = recode(as.factor(rho),
                      "0" = "rho == 0",
                      "0.4" = "rho == 0.4",
                      "0.8" = "rho == 0.8"),
         tau = recode(as.factor(tau),
                      "1" = "delta == 1",
                      "2" = "delta == 2",
                      "3" = "delta == 3"))

# Calculate power
power_lrg_df = sims_lrg %>% 
  filter(m == 10 | estimator == "diff_in_prevalence") %>% 
  group_by(estimator, gamma, rho, tau) %>% 
  summarize(power = mean(p.value <= 0.05, na.rm = TRUE))

# Visualize
power_lrg = ggplot(power_lrg_df) +
  aes(x = gamma, y = power, linetype = estimator, shape = estimator) +
  geom_vline(xintercept = 0.15, linetype = "dotted") +
  geom_line(size = 1) + geom_point(size = 2) +
  facet_grid(tau ~ rho, label = "label_parsed") +
  scale_x_continuous(breaks = seq(0, 1, by = 0.2)) +
  scale_y_continuous(breaks = seq(0, 1, by = 0.2)) +
  scale_linetype_discrete(name = "Test",
                          labels = c("Difference in differences",
                                     expression(paste('Signed rank (',
                                                      m == 10, ')')))) +
  scale_shape_discrete(name = "Test",
                       labels = c("Difference in differences",
                                  expression(paste('Signed rank (',
                                                   m == 10, ')')))) +
  theme(legend.position = "bottom") +
  labs(x = expression(paste('Proportion of unintended responses (',gamma,')')),
       y = expression(paste('Power at ', alpha  == 0.05)))

power_lrg

#### Figure E1 ####
# Cost of implementing a DLE as a function of sample loss
# Assume that doing a DLE costs you some effective sample because of extra piloting

# calculate power
cost_df = cost_sim %>% 
  mutate(rho = recode(as.factor(rho),
                      "0" = "rho == 0",
                      "0.4" = "rho == 0.4",
                      "0.8" = "rho == 0.8")) %>% 
  group_by(estimator, rho, cost) %>% 
  summarize(power = mean(p.value <= 0.05, na.rm = TRUE))

# Reorder terms
cost_df$estimator = fct_relevel(cost_df$estimator,
                                "List A", "List B", "DLE")

# Visualize
cost_plot = ggplot(cost_df) +
  aes(x = cost, y = power, linetype = estimator, shape = estimator) + 
  geom_hline(yintercept = 0.8, linetype = "dotted") +
  labs(x = "Sample size loss",
       y = expression(paste('Power at ', alpha  == 0.05)),
       shape = "Design",
       linetype = "Design") +
  geom_line(size = 1) + geom_point(size = 2) +
  facet_grid(~ rho, label = "label_parsed") +
  scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, by = 0.2)) +
  theme(legend.position = "bottom")

cost_plot
